Code
library(tidyverse)
library(here)
library(knitr)
library(kableExtra)
library(DT)library(tidyverse)
library(here)
library(knitr)
library(kableExtra)
library(DT)a_names <- read_csv(here::here("supporting_artifacts",
"learning_targets",
"StateNames_A.csv"))
a_names <- a_names |>
rename(Sex_at_Birth = 'Gender')
datatable(a_names,
colnames = c("Baby Names",
"Year Observed",
"Sex Assigned at Birth",
"State Observed",
"Count of Babies with Name"),
class = 'cell-border stripe',
caption = "Table of Baby Names that Start With the Letter 'A'",
filter = "top",
editable = "cell")allison_both_sexes <- a_names |>
filter(Name == "Allison") |>
group_by(Sex_at_Birth, State) |>
summarize(sex_count = sum(Count)) |>
pivot_wider(names_from = Sex_at_Birth,
values_from = sex_count)
allison_both_sexes[is.na(allison_both_sexes)] <- 0
kable(allison_both_sexes,
format = "pipe",
col.names = c("State Observed", "Count of Female Babies", "Count of Male Babies"),
align = "c",
caption = "Count of Female and Male Babies Named 'Allison' Per State")| State Observed | Count of Female Babies | Count of Male Babies |
|---|---|---|
| AK | 232 | 0 |
| AL | 1535 | 0 |
| AR | 1198 | 0 |
| AZ | 1880 | 0 |
| CA | 12413 | 0 |
| CO | 1594 | 0 |
| CT | 1099 | 0 |
| DC | 321 | 0 |
| DE | 294 | 0 |
| FL | 4455 | 0 |
| GA | 3257 | 0 |
| HI | 183 | 0 |
| IA | 1477 | 0 |
| ID | 451 | 0 |
| IL | 5110 | 0 |
| IN | 3067 | 0 |
| KS | 1283 | 0 |
| KY | 1905 | 20 |
| LA | 1209 | 0 |
| MA | 2218 | 0 |
| MD | 2229 | 0 |
| ME | 340 | 0 |
| MI | 4014 | 0 |
| MN | 2374 | 0 |
| MO | 2882 | 0 |
| MS | 817 | 0 |
| MT | 226 | 0 |
| NC | 3435 | 0 |
| ND | 285 | 0 |
| NE | 807 | 0 |
| NH | 412 | 0 |
| NJ | 3052 | 0 |
| NM | 399 | 0 |
| NV | 729 | 0 |
| NY | 5747 | 0 |
| OH | 5487 | 0 |
| OK | 1421 | 0 |
| OR | 1186 | 0 |
| PA | 4307 | 0 |
| RI | 306 | 0 |
| SC | 1228 | 0 |
| SD | 376 | 0 |
| TN | 2488 | 0 |
| TX | 10192 | 0 |
| UT | 1125 | 0 |
| VA | 3220 | 0 |
| VT | 135 | 0 |
| WA | 1956 | 0 |
| WI | 2367 | 0 |
| WV | 813 | 0 |
| WY | 142 | 0 |
allison_females <- a_names |>
filter(Name == "Allison",
Sex_at_Birth == "F") |>
group_by(Sex_at_Birth, State) |>
summarize(sex_count = sum(Count), .groups = "drop")allison <- a_names |>
filter(Name == "Allison") |>
group_by(Year) |>
summarize(a_count = sum(Count), .groups = "drop")
ggplot(data = allison,
mapping = aes(x = Year, y = a_count)) +
geom_point() +
geom_line() +
labs(x = "Year Observed",
y = "",
title = "Count of 'Allison'") +
scale_x_continuous(n.breaks=10) +
scale_y_continuous(n.breaks=10)
# https://www.statology.org/ggplot-axis-ticks/allison_lm <- allison |>
lm(a_count ~ Year, data = _)
allison_lm
Call:
lm(formula = a_count ~ Year, data = allison)
Coefficients:
(Intercept) Year
209815.1 -101.6
allison |>
ggplot(aes(x = a_count, y = Year)) +
geom_point() +
stat_smooth(method = "lm") +
labs(x = "Year Observed",
y = "",
title = "Count of 'Allison'")`geom_smooth()` using formula 'y ~ x'

y = 209815.1 - 101.6x
allison_lm |>
broom::augment() |>
ggplot(mapping = aes(x = Year,
y = .resid)) +
geom_point() +
labs(x = "Year Observed",
y = "",
title = "Residuals") +
scale_x_continuous(n.breaks=10)
The only significant pattern I see is that starting in 2011, the residuals begin to increase in a linear way.
Based on this model, there was an increasing amount of children being named “Allison” than was actually predicted, resulting in a positive linear pattern. Where this data ends in 2014, your name wasn’t as cool as it was in 1998 or 2009, but if this pattern has continued, it is possible that your name is getting more and more cool again.
a_names |>
filter(Sex_at_Birth == "M",
Name == "Allan" | Name == "Alan" | Name == "Allen") |>
group_by(Name, Year) |>
summarise(al_count = sum(Count), .groups = "drop") |>
ggplot(mapping = aes(x = Year, y = al_count, color = Name)) +
geom_point() +
geom_line() +
labs(x = "Year Observed",
y = "",
title = "Count of Name Variant") +
scale_x_continuous(n.breaks=10) +
scale_y_continuous(n.breaks=10)
als_PACA_2000 <- a_names |>
filter(Name == "Allan" | Name == "Alan" | Name == "Allen",
Year == "2000",
State == "PA" | State == "CA",
Sex_at_Birth == "M") |>
group_by(State, Name) |>
summarize(al_counts = sum(Count), .groups = "drop") |>
pivot_wider(names_from = Name,
values_from = al_counts)
kable(als_PACA_2000,
format = "pipe",
col.names = c("State Observed",
"Count of 'Alan' Babies'",
"Count of 'Allan' Babies",
"Count of 'Allen' Babies"),
align = "c",
caption = "Count of Babies Named 'Alan/Allan/Allen' Per State")| State Observed | Count of ‘Alan’ Babies’ | Count of ‘Allan’ Babies | Count of ‘Allen’ Babies |
|---|---|---|---|
| CA | 579 | 131 | 176 |
| PA | 51 | 12 | 56 |
convert_percent <- function(row, total) {
stopifnot(is.numeric(row))
stopifnot(is.numeric(total))
percentage <- round((row / total) * 100, 2)
return(percentage)
}
al_percent <- a_names |>
filter(Name == "Allan" | Name == "Alan" | Name == "Allen",
Year == "2000",
State == "PA" | State == "CA",
Sex_at_Birth == "M") |>
group_by(State, Name) |>
summarize(al_count = sum(Count), .groups = "drop") |>
pivot_wider(names_from = Name,
values_from = al_count) |>
rowwise() |>
mutate(total_count = sum(c_across(Alan : Allen)),
Alan = convert_percent(Alan, total_count),
Allan = convert_percent(Allan, total_count),
Allen = convert_percent(Allen, total_count)) |>
subset(, select = -c(total_count))
kable(al_percent,
format = "pipe",
col.names = c("State Observed",
"Percent of 'Alan' Babies'",
"Percent of 'Allan' Babies",
"Percent of 'Allen' Babies"),
align = "c",
caption = "Percent of Babies Named 'Alan/Allan/Allen' Per State")| State Observed | Percent of ‘Alan’ Babies’ | Percent of ‘Allan’ Babies | Percent of ‘Allen’ Babies |
|---|---|---|---|
| CA | 65.35 | 14.79 | 19.86 |
| PA | 42.86 | 10.08 | 47.06 |
al_percent |>
kbl(col.names = c("State Observed",
"Percent of 'Alan' Babies'",
"Percent of 'Allan' Babies",
"Percent of 'Allen' Babies"),
caption = "Percent of Babies Named 'Alan/Allan/Allen' Per State",
align = "c") |>
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "bordered"),
position = "left",
html_font = "Times New Roman")| State Observed | Percent of 'Alan' Babies' | Percent of 'Allan' Babies | Percent of 'Allen' Babies |
|---|---|---|---|
| CA | 65.35 | 14.79 | 19.86 |
| PA | 42.86 | 10.08 | 47.06 |